In [64]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import Imputer

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

from sklearn import metrics
from sklearn.metrics import roc_curve,roc_auc_score

from sklearn import svm

from sklearn import model_selection

from sklearn.model_selection import RandomizedSearchCV
from pprint import pprint
In [65]:
data=pd.read_csv('vehicle.csv')
data.head()
Out[65]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus
In [66]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 19 columns):
compactness                    846 non-null int64
circularity                    841 non-null float64
distance_circularity           842 non-null float64
radius_ratio                   840 non-null float64
pr.axis_aspect_ratio           844 non-null float64
max.length_aspect_ratio        846 non-null int64
scatter_ratio                  845 non-null float64
elongatedness                  845 non-null float64
pr.axis_rectangularity         843 non-null float64
max.length_rectangularity      846 non-null int64
scaled_variance                843 non-null float64
scaled_variance.1              844 non-null float64
scaled_radius_of_gyration      844 non-null float64
scaled_radius_of_gyration.1    842 non-null float64
skewness_about                 840 non-null float64
skewness_about.1               845 non-null float64
skewness_about.2               845 non-null float64
hollows_ratio                  846 non-null int64
class                          846 non-null object
dtypes: float64(14), int64(4), object(1)
memory usage: 125.7+ KB
In [67]:
#We see that many columns has null values in it. So we need to deal with them.
In [68]:
data.describe()
Out[68]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
count 846.000000 841.000000 842.000000 840.000000 844.000000 846.000000 845.000000 845.000000 843.000000 846.000000 843.000000 844.000000 844.000000 842.000000 840.000000 845.000000 845.000000 846.000000
mean 93.678487 44.828775 82.110451 168.888095 61.678910 8.567376 168.901775 40.933728 20.582444 147.998818 188.631079 439.494076 174.709716 72.447743 6.364286 12.602367 188.919527 195.632388
std 8.234474 6.152172 15.778292 33.520198 7.891463 4.601217 33.214848 7.816186 2.592933 14.515652 31.411004 176.666903 32.584808 7.486190 4.920649 8.936081 6.155809 7.438797
min 73.000000 33.000000 40.000000 104.000000 47.000000 2.000000 112.000000 26.000000 17.000000 118.000000 130.000000 184.000000 109.000000 59.000000 0.000000 0.000000 176.000000 181.000000
25% 87.000000 40.000000 70.000000 141.000000 57.000000 7.000000 147.000000 33.000000 19.000000 137.000000 167.000000 318.000000 149.000000 67.000000 2.000000 5.000000 184.000000 190.250000
50% 93.000000 44.000000 80.000000 167.000000 61.000000 8.000000 157.000000 43.000000 20.000000 146.000000 179.000000 363.500000 173.500000 71.500000 6.000000 11.000000 188.000000 197.000000
75% 100.000000 49.000000 98.000000 195.000000 65.000000 10.000000 198.000000 46.000000 23.000000 159.000000 217.000000 587.000000 198.000000 75.000000 9.000000 19.000000 193.000000 201.000000
max 119.000000 59.000000 112.000000 333.000000 138.000000 55.000000 265.000000 61.000000 29.000000 188.000000 320.000000 1018.000000 268.000000 135.000000 22.000000 41.000000 206.000000 211.000000

DATA PREPROCESSING

Univariant Analysis

In [69]:
#Replacing null values in all columns with mean/median
In [70]:
data['circularity'].isnull().sum()

data['circularity'].fillna(data['circularity'].mean(),inplace=True)
In [71]:
data['distance_circularity'].isnull().sum()

data['distance_circularity'].fillna(data['distance_circularity'].mean(),inplace=True)
In [72]:
data['radius_ratio'].isnull().sum()

data['radius_ratio'].fillna(data['radius_ratio'].mean(),inplace=True)
In [73]:
data['pr.axis_aspect_ratio'].isnull().sum()

data['pr.axis_aspect_ratio'].fillna(data['pr.axis_aspect_ratio'].mean(),inplace=True)
In [74]:
data['scatter_ratio'].isnull().sum()

data['scatter_ratio'].fillna(data['scatter_ratio'].mean(),inplace=True)
In [75]:
data['elongatedness'].isnull().sum()

data['elongatedness'].fillna(data['elongatedness'].mean(),inplace=True)
In [76]:
data['pr.axis_rectangularity'].isnull().sum()

data['pr.axis_rectangularity'].fillna(data['pr.axis_rectangularity'].mean(),inplace=True)
In [77]:
data['scaled_variance'].isnull().sum()

data['scaled_variance'].fillna(data['scaled_variance'].mean(),inplace=True)
In [78]:
data['scaled_variance.1'].isnull().sum()

data['scaled_variance.1'].fillna(data['scaled_variance.1'].median(),inplace=True)
In [79]:
data['scaled_radius_of_gyration'].isnull().sum()

data['scaled_radius_of_gyration'].fillna(data['scaled_radius_of_gyration'].mean(),inplace=True)
In [80]:
data['scaled_radius_of_gyration.1'].isnull().sum()

data['scaled_radius_of_gyration.1'].fillna(data['scaled_radius_of_gyration.1'].mean(),inplace=True)
In [81]:
data['skewness_about'].isnull().sum()

data['skewness_about'].fillna(data['skewness_about'].mean(),inplace=True)
In [82]:
data['skewness_about.1'].isnull().sum()

data['skewness_about.1'].fillna(data['skewness_about.1'].mean(),inplace=True)
In [83]:
data['skewness_about.2'].isnull().sum()

data['skewness_about.2'].fillna(data['skewness_about.2'].mean(),inplace=True)
In [84]:
data.isnull().sum()
Out[84]:
compactness                    0
circularity                    0
distance_circularity           0
radius_ratio                   0
pr.axis_aspect_ratio           0
max.length_aspect_ratio        0
scatter_ratio                  0
elongatedness                  0
pr.axis_rectangularity         0
max.length_rectangularity      0
scaled_variance                0
scaled_variance.1              0
scaled_radius_of_gyration      0
scaled_radius_of_gyration.1    0
skewness_about                 0
skewness_about.1               0
skewness_about.2               0
hollows_ratio                  0
class                          0
dtype: int64
In [85]:
plt.figure(figsize=(30,40))
plt.subplot(6,3,1)
sns.boxplot(data['compactness'])
plt.subplot(6,3,2)
sns.boxplot(data['circularity'])
plt.subplot(6,3,3)
sns.boxplot(data['distance_circularity'])
plt.subplot(6,3,4)
sns.boxplot(data['pr.axis_aspect_ratio'])
plt.subplot(6,3,5)
sns.boxplot(data['max.length_aspect_ratio'])
plt.subplot(6,3,6)
sns.boxplot(data['scatter_ratio'])
plt.subplot(6,3,7)
sns.boxplot(data['elongatedness'])
plt.subplot(6,3,8)
sns.boxplot(data['pr.axis_rectangularity'])
plt.subplot(6,3,9)
sns.boxplot(data['max.length_rectangularity'])
plt.subplot(6,3,10)
sns.boxplot(data['scaled_variance'])
plt.subplot(6,3,11)
sns.boxplot(data['scaled_radius_of_gyration'])
plt.subplot(6,3,12)
sns.boxplot(data['scaled_variance.1'])
plt.subplot(6,3,13)
sns.boxplot(data['scaled_radius_of_gyration.1'])
plt.subplot(6,3,14)
sns.boxplot(data['skewness_about'])
plt.subplot(6,3,15)
sns.boxplot(data['skewness_about.1'])
plt.subplot(6,3,16)
sns.boxplot(data['skewness_about.2'])
plt.subplot(6,3,17)
sns.boxplot(data['hollows_ratio'])
Out[85]:
<matplotlib.axes._subplots.AxesSubplot at 0x27e7d518>
In [86]:
data.boxplot(figsize=(35,20))
Out[86]:
<matplotlib.axes._subplots.AxesSubplot at 0x29878a58>
In [87]:
#We see that the columns pr.axis_aspect_ratio,max.length_aspect_ratio,scaled_radius_of_gyration.1,skewness_about,radius_ratio are largely affected by the outliers.
In [88]:
pd.crosstab(data['pr.axis_aspect_ratio'],data['class'])
Out[88]:
class bus car van
pr.axis_aspect_ratio
47.00000 2 0 0
48.00000 1 0 3
49.00000 1 0 2
50.00000 3 1 1
51.00000 7 1 3
52.00000 3 2 9
53.00000 8 13 6
54.00000 12 16 10
55.00000 7 19 11
56.00000 13 28 16
57.00000 7 29 8
58.00000 4 22 17
59.00000 6 43 15
60.00000 5 33 8
61.00000 8 26 8
61.67891 1 1 0
62.00000 6 42 10
63.00000 4 28 13
64.00000 17 30 22
65.00000 13 20 5
66.00000 10 17 10
67.00000 7 16 5
68.00000 12 15 7
69.00000 13 10 2
70.00000 10 7 1
71.00000 10 5 0
72.00000 7 2 1
73.00000 6 1 0
74.00000 7 2 0
75.00000 5 0 0
76.00000 1 0 0
97.00000 0 0 1
102.00000 0 0 1
103.00000 1 0 0
105.00000 0 0 1
126.00000 1 0 1
133.00000 0 0 1
138.00000 0 0 1
In [89]:
#We see that from 76 it follows same pattern,

data['pr.axis_aspect_ratio']=np.where(data['pr.axis_aspect_ratio']>76,76,data['pr.axis_aspect_ratio'])
In [90]:
pd.crosstab(data['max.length_aspect_ratio'],data['class'])
Out[90]:
class bus car van
max.length_aspect_ratio
2 0 0 1
3 0 2 2
4 7 9 2
5 26 15 10
6 78 35 19
7 80 62 26
8 22 58 33
9 0 60 34
10 0 80 32
11 0 78 30
12 0 27 3
13 0 3 0
19 1 0 0
22 2 0 0
25 0 0 1
43 0 0 1
46 0 0 1
48 0 0 1
49 0 0 2
52 2 0 0
55 0 0 1
In [91]:
#We see that from 19 it follows same pattern,

data['max.length_aspect_ratio']=np.where(data['max.length_aspect_ratio']>19,19,data['max.length_aspect_ratio'])
In [92]:
pd.crosstab(data['scaled_radius_of_gyration.1'],data['class'])
Out[92]:
class bus car van
scaled_radius_of_gyration.1
59.000000 0 1 0
60.000000 0 2 0
61.000000 0 9 2
62.000000 0 12 6
63.000000 0 16 8
64.000000 0 25 13
65.000000 3 20 8
66.000000 4 28 8
67.000000 5 35 13
68.000000 7 27 5
69.000000 15 20 9
70.000000 9 34 9
71.000000 13 43 12
72.000000 16 45 14
72.447743 1 3 0
73.000000 8 27 11
74.000000 12 29 12
75.000000 11 11 16
76.000000 7 8 10
77.000000 8 5 7
78.000000 7 2 8
79.000000 5 3 3
80.000000 11 4 3
81.000000 14 3 2
82.000000 11 4 2
83.000000 9 4 0
84.000000 5 1 1
85.000000 19 1 4
86.000000 6 3 3
87.000000 6 4 1
88.000000 4 0 1
89.000000 0 0 1
90.000000 0 0 2
91.000000 0 0 1
97.000000 0 0 1
99.000000 0 0 1
118.000000 0 0 1
119.000000 1 0 0
127.000000 1 0 0
135.000000 0 0 1
In [93]:
#We see that from 89 it follows same pattern,

data['scaled_radius_of_gyration.1']=np.where(data['scaled_radius_of_gyration.1']>89,89,data['scaled_radius_of_gyration.1'])
In [94]:
pd.crosstab(data['skewness_about'],data['class'])
Out[94]:
class bus car van
skewness_about
0.000000 19 44 14
1.000000 22 38 21
2.000000 19 29 15
3.000000 17 26 13
4.000000 28 25 17
5.000000 22 35 14
6.000000 25 27 13
6.364286 4 2 0
7.000000 18 22 20
8.000000 15 19 13
9.000000 14 22 10
10.000000 6 21 9
11.000000 4 20 7
12.000000 2 18 10
13.000000 2 18 6
14.000000 0 14 4
15.000000 0 15 4
16.000000 0 10 1
17.000000 1 5 5
18.000000 0 4 2
19.000000 0 3 1
20.000000 0 3 0
21.000000 0 5 0
22.000000 0 4 0
In [95]:
#We see that from 17 it follows same pattern,

data['skewness_about']=np.where(data['skewness_about']>17,17,data['skewness_about'])
In [96]:
pd.crosstab(data['radius_ratio'],data['class'])
Out[96]:
class bus car van
radius_ratio
104.0 0 1 0
105.0 0 0 1
109.0 0 0 1
110.0 0 1 2
111.0 0 1 3
112.0 0 0 1
113.0 2 0 2
114.0 0 1 3
115.0 0 2 2
116.0 2 3 2
117.0 0 1 3
118.0 1 1 0
119.0 1 2 2
120.0 7 1 1
121.0 2 3 3
122.0 3 2 0
123.0 6 2 2
124.0 1 0 2
125.0 5 2 6
126.0 3 1 4
127.0 1 5 1
128.0 4 2 0
129.0 1 2 2
130.0 5 5 2
131.0 0 2 4
132.0 1 3 1
133.0 1 3 7
134.0 0 1 2
135.0 1 2 2
136.0 1 5 5
... ... ... ...
211.0 0 8 0
212.0 0 5 0
213.0 1 7 0
214.0 0 2 0
215.0 0 5 0
216.0 1 2 0
217.0 0 1 0
218.0 0 2 0
219.0 1 5 0
220.0 0 4 0
221.0 0 4 0
222.0 0 5 0
223.0 1 2 0
224.0 0 2 0
225.0 0 4 0
226.0 1 0 0
227.0 1 1 0
228.0 0 5 0
230.0 0 4 0
231.0 0 5 1
232.0 0 1 0
234.0 0 2 0
235.0 1 0 0
238.0 1 0 0
246.0 1 0 1
250.0 0 0 1
252.0 1 0 0
306.0 0 0 1
322.0 0 0 1
333.0 0 0 1

135 rows × 3 columns

In [97]:
#We see that from 235 it follows same pattern,

data['radius_ratio']=np.where(data['radius_ratio']>235,235,data['radius_ratio'])
In [98]:
#Now we have done some tings to overcome the outliers, Lets see the box plot now,

data.boxplot(figsize=(40,15))


#The outliers has been handled to some extend.
Out[98]:
<matplotlib.axes._subplots.AxesSubplot at 0x28e39668>

Bivariant Analysis

In [99]:
sns.pairplot(data,diag_kind='kde')
Out[99]:
<seaborn.axisgrid.PairGrid at 0x2815feb8>
In [100]:
corr=data.corr()
plt.figure(figsize=(20,10))
plt.subplot(1,1,1)
sns.heatmap(corr,annot=True)
Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a8186d8>
Inference from above Bivariant analysis: compactness has negative correlation with elongatedness circularity has negative correlation with elongatedness distance_circularity has negative correlation with elongatedness radius_ratio has negative correlation with elongatedness pr.axis_regularity has negative correlation with elongatedness max.length_regularity has negative correlation with elongatedness scaled_variance has negative correlation with elongatedness scaled_variance.1 has negative correlation with elongatedness scales_radius_of_gyration has negative correlation with elongatedness scales_radius_of_gyration.1 has has negative correlation with skewness_about.2 scales_radius_of_gyration.1 has has negative correlation with hollow_ratio compactness has positive correlation with scatter_ratio compactness has positive correlation with scaled_variance.1 circularity has positive correlation with max.length_regularity circularity has positive correlation with scales_radius_of_gyration circularity has positive correlation with pr.axis_regularityWe see that the columns, scatter_ratio elongatedness pr.axis_regularity has strong correlation with other columns. So we can remove them from the Dataframe, But it also dont have correlation with some other columns. So lets not remove it annd try using SVM with all columns.

SVM Modelling - Without PCM

In [101]:
#Splitting of Independent and Dependent variables

y=data['class']
X=data.drop(columns='class')
In [102]:
#Standardization of Data

def standardization(X_train,X_test):
    scaler=preprocessing.StandardScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)
    return X_train,X_test
In [103]:
#SVM

def svm_fun(X,y):
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=1)
    X_train,X_test=standardization(X_train,X_test)
    
    clf = svm.SVC(gamma=0.025,C=3)
    #when C increases Marigin shrinks
    # gamma is a measure of influence of a data point. It is inverse of distance of influence. C is complexity of the model
    # lower C value creates simple hyper surface while higher C creates complex surface

    clf.fit(X_train,y_train)
    svm_pred=clf.predict(X_test)
    svm_score=clf.score(X_test,y_test)
    print("The KNN model prediction is " + str(svm_score*100) + "%")
    
    print("The confusion matrix is ")
    print(metrics.confusion_matrix(y_test,svm_pred))
    print("the Classification report is")
    print(metrics.classification_report(y_test,svm_pred))
    #roc=roc_auc_score(y_test, svm_pred)
    #print("ROC value for svm model is "+ str(roc*100) + "%")
In [104]:
#SVM

svm_fun(X,y)
The KNN model prediction is 96.8503937007874%
The confusion matrix is 
[[ 58   1   0]
 [  1 129   3]
 [  2   1  59]]
the Classification report is
              precision    recall  f1-score   support

         bus       0.95      0.98      0.97        59
         car       0.98      0.97      0.98       133
         van       0.95      0.95      0.95        62

   micro avg       0.97      0.97      0.97       254
   macro avg       0.96      0.97      0.97       254
weighted avg       0.97      0.97      0.97       254

SVM Modelling - With PCM

In [105]:
#Splitting of Independent and Dependent variables

y_pcm=data['class']
X_pcm=data.drop(columns='class')
In [106]:
# We transform (centralize) the entire X (independent variable data) to zscores through transformation. We will create the PCA dimensions
# on this distribution. 

#Covariance is done only on Independent variables
sc = preprocessing.StandardScaler()
X_std =  sc.fit_transform(X_pcm)          
cov_matrix = np.cov(X_std.T)
print('Covariance Matrix \n%s', cov_matrix)
Covariance Matrix 
%s [[ 1.00118343e+00  6.86232515e-01  7.90844121e-01  7.19486607e-01
   1.69249223e-01  3.75112934e-01  8.13196231e-01 -7.89575871e-01
   8.14598883e-01  6.76943342e-01  7.63672647e-01  8.14975660e-01
   5.85848651e-01 -2.65007546e-01  2.28037252e-01  1.57573159e-01
   2.98879698e-01  3.65984455e-01]
 [ 6.86232515e-01  1.00118343e+00  7.93953986e-01  6.39617688e-01
   1.98320397e-01  4.70814297e-01  8.49210579e-01 -8.22873474e-01
   8.45971641e-01  9.63080942e-01  7.97764504e-01  8.37974499e-01
   9.27985236e-01  5.57001522e-02  1.44409584e-01 -1.18652734e-02
  -1.05769799e-01  4.53716437e-02]
 [ 7.90844121e-01  7.93953986e-01  1.00118343e+00  7.94643227e-01
   2.30155096e-01  5.40884263e-01  9.05470613e-01 -9.12513680e-01
   8.94185135e-01  7.75586236e-01  8.63000311e-01  8.87088703e-01
   7.06788350e-01 -2.48315297e-01  1.10995350e-01  2.65867562e-01
   1.45734974e-01  3.32488398e-01]
 [ 7.19486607e-01  6.39617688e-01  7.94643227e-01  1.00118343e+00
   6.75408459e-01  4.74841250e-01  7.65389977e-01 -8.22197108e-01
   7.39157474e-01  5.83257155e-01  7.94400897e-01  7.49883755e-01
   5.51956783e-01 -3.41762109e-01  4.89058971e-02  1.81266781e-01
   4.02511849e-01  4.91521317e-01]
 [ 1.69249223e-01  1.98320397e-01  2.30155096e-01  6.75408459e-01
   1.00118343e+00  2.60523123e-01  1.75041099e-01 -2.77912342e-01
   1.44113370e-01  1.50207734e-01  2.40562828e-01  1.58790605e-01
   1.49150617e-01 -2.43035635e-01 -5.88189601e-02 -2.74382474e-02
   3.71647245e-01  3.92466689e-01]
 [ 3.75112934e-01  4.70814297e-01  5.40884263e-01  4.74841250e-01
   2.60523123e-01  1.00118343e+00  3.82695659e-01 -4.00532549e-01
   3.78092273e-01  5.52229026e-01  4.03166544e-01  3.44659777e-01
   3.38198509e-01 -1.75195592e-01  6.09547956e-02  1.12455614e-01
   3.33064584e-02  3.24737565e-01]
 [ 8.13196231e-01  8.49210579e-01  9.05470613e-01  7.65389977e-01
   1.75041099e-01  3.82695659e-01  1.00118343e+00 -9.71871689e-01
   9.90540752e-01  8.09312247e-01  9.49418500e-01  9.93679002e-01
   8.00211740e-01 -1.57715266e-02  7.27104291e-02  2.13378998e-01
   5.17279380e-03  1.18588382e-01]
 [-7.89575871e-01 -8.22873474e-01 -9.12513680e-01 -8.22197108e-01
  -2.77912342e-01 -4.00532549e-01 -9.71871689e-01  1.00118343e+00
  -9.50200397e-01 -7.76436962e-01 -9.37823119e-01 -9.54893241e-01
  -7.66935431e-01  1.04941300e-01 -5.10820899e-02 -1.85911029e-01
  -1.14863270e-01 -2.16975309e-01]
 [ 8.14598883e-01  8.45971641e-01  8.94185135e-01  7.39157474e-01
   1.44113370e-01  3.78092273e-01  9.90540752e-01 -9.50200397e-01
   1.00118343e+00  8.12406876e-01  9.35673866e-01  9.89490977e-01
   7.98010831e-01  5.63103400e-04  8.15028751e-02  2.14988382e-01
  -1.90119932e-02  9.93087860e-02]
 [ 6.76943342e-01  9.63080942e-01  7.75586236e-01  5.83257155e-01
   1.50207734e-01  5.52229026e-01  8.09312247e-01 -7.76436962e-01
   8.12406876e-01  1.00118343e+00  7.46091125e-01  7.95554917e-01
   8.67449912e-01  4.26296973e-02  1.36123010e-01  1.66021803e-03
  -1.04377120e-01  7.68604682e-02]
 [ 7.63672647e-01  7.97764504e-01  8.63000311e-01  7.94400897e-01
   2.40562828e-01  4.03166544e-01  9.49418500e-01 -9.37823119e-01
   9.35673866e-01  7.46091125e-01  1.00118343e+00  9.47043445e-01
   7.79896606e-01  5.53825862e-02  3.52017378e-02  1.95490628e-01
   1.44350314e-02  8.66965362e-02]
 [ 8.14975660e-01  8.37974499e-01  8.87088703e-01  7.49883755e-01
   1.58790605e-01  3.44659777e-01  9.93679002e-01 -9.54893241e-01
   9.89490977e-01  7.95554917e-01  9.47043445e-01  1.00118343e+00
   7.95904030e-01 -5.09890473e-04  7.48939120e-02  2.01379239e-01
   6.04953293e-03  1.03057140e-01]
 [ 5.85848651e-01  9.27985236e-01  7.06788350e-01  5.51956783e-01
   1.49150617e-01  3.38198509e-01  8.00211740e-01 -7.66935431e-01
   7.98010831e-01  8.67449912e-01  7.79896606e-01  7.95904030e-01
   1.00118343e+00  2.07455809e-01  1.68749747e-01 -5.60390207e-02
  -2.25131646e-01 -1.18297105e-01]
 [-2.65007546e-01  5.57001522e-02 -2.48315297e-01 -3.41762109e-01
  -2.43035635e-01 -1.75195592e-01 -1.57715266e-02  1.04941300e-01
   5.63103400e-04  4.26296973e-02  5.53825862e-02 -5.09890473e-04
   2.07455809e-01  1.00118343e+00 -9.04242206e-02 -1.33445120e-01
  -8.33620817e-01 -8.97251341e-01]
 [ 2.28037252e-01  1.44409584e-01  1.10995350e-01  4.89058971e-02
  -5.88189601e-02  6.09547956e-02  7.27104291e-02 -5.10820899e-02
   8.15028751e-02  1.36123010e-01  3.52017378e-02  7.48939120e-02
   1.68749747e-01 -9.04242206e-02  1.00118343e+00 -3.65086244e-02
   1.08161502e-01  8.80836542e-02]
 [ 1.57573159e-01 -1.18652734e-02  2.65867562e-01  1.81266781e-01
  -2.74382474e-02  1.12455614e-01  2.13378998e-01 -1.85911029e-01
   2.14988382e-01  1.66021803e-03  1.95490628e-01  2.01379239e-01
  -5.60390207e-02 -1.33445120e-01 -3.65086244e-02  1.00118343e+00
   7.75191893e-02  2.05332710e-01]
 [ 2.98879698e-01 -1.05769799e-01  1.45734974e-01  4.02511849e-01
   3.71647245e-01  3.33064584e-02  5.17279380e-03 -1.14863270e-01
  -1.90119932e-02 -1.04377120e-01  1.44350314e-02  6.04953293e-03
  -2.25131646e-01 -8.33620817e-01  1.08161502e-01  7.75191893e-02
   1.00118343e+00  8.93896286e-01]
 [ 3.65984455e-01  4.53716437e-02  3.32488398e-01  4.91521317e-01
   3.92466689e-01  3.24737565e-01  1.18588382e-01 -2.16975309e-01
   9.93087860e-02  7.68604682e-02  8.66965362e-02  1.03057140e-01
  -1.18297105e-01 -8.97251341e-01  8.80836542e-02  2.05332710e-01
   8.93896286e-01  1.00118343e+00]]
In [107]:
#The dimensions are rotated

eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print('Eigen Vectors \n%s', eigenvectors)
print('\n Eigen Values \n%s', eigenvalues)
Eigen Vectors 
%s [[-2.73062440e-01 -9.40568902e-02 -7.10071869e-02  2.07657112e-01
   1.27667189e-01  7.16059806e-02  3.22979382e-01  7.01126279e-01
  -3.76976126e-01 -2.72322082e-01 -5.23164950e-02  7.09257830e-02
   1.31556754e-01 -1.10566177e-02  5.18610885e-02 -8.25829065e-02
  -1.10706613e-02 -2.71010754e-03]
 [-2.89221228e-01  1.29789421e-01  1.95220869e-01  5.78945487e-02
  -7.02771479e-02  1.00857154e-01 -3.96169853e-01  1.20024075e-01
  -1.68236068e-02  6.01059106e-02  1.89813253e-04 -2.72320152e-01
  -5.08902076e-03 -5.34338818e-03  2.41683420e-01  1.16769072e-01
  -7.16678872e-01  8.82748206e-02]
 [-3.02882713e-01 -5.17448623e-02 -1.01442970e-01  3.03896999e-02
  -1.00064881e-01  6.27445098e-02  4.56330691e-02 -3.61624151e-01
  -2.56764053e-01 -1.41312698e-01 -7.63794554e-01  7.74528568e-02
  -9.02263456e-02  1.14352379e-02 -1.83446633e-01  1.66907769e-01
  -4.97322523e-02 -5.67582038e-03]
 [-2.72050113e-01 -2.00697297e-01  9.22357860e-02 -2.33514525e-01
   8.56490857e-02 -2.34084768e-01  1.33889801e-01 -6.54323926e-02
  -2.14100475e-02 -1.63427047e-01  9.90895344e-02 -3.60063989e-01
  -3.07143517e-01 -3.41539994e-02  4.75020872e-01  3.98826406e-01
   2.99025677e-01 -4.55479218e-02]
 [-9.72149355e-02 -2.45480130e-01  3.95609615e-01 -5.03628436e-01
   7.21966902e-02 -5.23012114e-01 -1.12530463e-01  8.81161391e-02
  -2.34835678e-01  1.15674924e-01 -2.67968716e-02  1.52270746e-01
   1.11197421e-01  1.97537659e-02 -2.68040653e-01 -1.81570972e-01
  -1.03514074e-01  1.55439338e-02]
 [-1.65903450e-01 -8.42548836e-02  2.05165196e-01 -8.19369447e-02
  -8.11922821e-01  9.21630714e-02  3.77317669e-01 -2.98006542e-03
   1.52385948e-01 -5.69336717e-02  1.79674453e-01  3.78065792e-02
   1.36643295e-01 -7.67724122e-03 -1.11106729e-01  1.05695307e-01
  -6.14355552e-02 -1.68674051e-02]
 [-3.12052343e-01  6.96154134e-02 -1.27664066e-01 -2.47077911e-03
   1.10741077e-01  1.89593176e-02  8.28322042e-02 -1.05678901e-01
  -2.10623708e-02  2.00707771e-01  1.86019300e-01  1.17259483e-01
   4.39653786e-02  8.24936312e-01 -1.62934459e-02  5.03980634e-02
  -3.28618430e-02 -2.74594521e-01]
 [ 3.10342884e-01 -6.74739302e-03  7.66104737e-02  5.64183412e-02
  -1.30613885e-01 -1.23671249e-02 -7.19870414e-02  2.37462640e-01
  -9.35884845e-02 -1.94822638e-01  3.79020953e-02 -1.30519373e-01
  -7.12377802e-01  2.37614654e-01 -4.00840639e-01  1.09796602e-01
  -9.72058695e-02 -6.70754179e-02]
 [-3.08998473e-01  8.23048171e-02 -1.36800002e-01  1.90853610e-02
   9.95815892e-02  2.85869818e-02  8.48217844e-02 -5.68392744e-02
  -7.43565478e-02  2.33027182e-01  2.67813442e-01  1.85248747e-01
  -2.13022772e-01 -7.47014044e-02 -2.27756112e-01  1.79877610e-01
   3.90291675e-02  7.44933180e-01]
 [-2.79535714e-01  1.20008752e-01  1.90555410e-01  8.31143012e-02
  -1.97699364e-01  1.74749776e-01 -3.56612067e-01  2.63407274e-01
  -9.06702606e-03  4.23099065e-01 -1.63858398e-01 -2.41461528e-01
  -5.89213080e-02 -1.62798811e-02 -9.97324216e-02 -1.61295955e-01
   5.37985991e-01 -6.84350464e-02]
 [-3.02014951e-01  7.16405075e-02 -9.45556037e-02 -9.41267604e-02
   1.11411688e-01 -6.60360591e-02  2.30515721e-01 -8.32369839e-02
   4.00553549e-01 -2.21894760e-01 -5.71197011e-02 -3.57753374e-01
  -1.67910823e-01  2.07195523e-02 -1.25880802e-01 -6.37500734e-01
  -8.51726226e-02  9.97663769e-02]
 [-3.08502391e-01  7.66043290e-02 -1.34503638e-01  8.64261703e-03
   1.52500016e-01  2.07706958e-02  9.28108075e-02 -6.09110028e-02
   1.22858658e-03  1.96846372e-01  2.41024784e-01  1.33433616e-01
  -2.05727841e-01 -5.02524057e-01 -2.58956673e-01  9.23493020e-02
  -1.44045050e-01 -5.82187834e-01]
 [-2.65986295e-01  2.09046426e-01  2.01294389e-01  6.07890257e-02
   3.53978686e-02 -2.47708299e-03 -4.22194177e-01 -4.58210099e-02
   1.92245062e-01 -6.39418952e-01  1.66845657e-01  3.45565540e-01
   7.10655692e-02  8.46413729e-03 -1.03876346e-01  7.49496843e-02
   2.11022071e-01 -2.45298994e-02]
 [ 3.99777786e-02  4.97998790e-01  2.84276523e-02 -1.57815684e-01
   1.08643779e-02 -2.71211767e-01  1.90322958e-01  3.39040094e-01
   4.27710661e-01  1.57545046e-01 -3.53440645e-01  2.93516160e-01
  -1.21337798e-01  1.29159215e-02  1.59009821e-01  1.98023340e-01
  -4.02904645e-02  9.70992552e-04]
 [-4.03056996e-02 -3.43862110e-02  2.16477132e-01  7.56692958e-01
  -4.72771224e-02 -5.80378823e-01  7.93132579e-02 -1.43936606e-01
   2.13091549e-02  9.38608617e-02  1.38813588e-02 -2.04199290e-02
  -9.85437600e-03 -2.89432213e-03  1.40527865e-02 -3.85355691e-02
  -1.23854995e-03 -5.62140810e-04]
 [-5.85868454e-02 -9.93902057e-02 -7.39314508e-01 -6.06482461e-02
  -3.34188647e-01 -4.10747307e-01 -3.42147409e-01  1.59044278e-01
   2.55221796e-02 -4.81794933e-02  3.32633511e-02 -5.44337656e-02
   7.63420583e-02 -1.39736658e-02 -1.68577036e-02  1.68287075e-02
   2.06305503e-03 -5.70406714e-03]
 [-3.58888627e-02 -5.09959590e-01  2.10809783e-02  9.41711360e-02
   2.43960517e-01  8.40534275e-02 -3.36920204e-02  1.89540232e-01
   5.31097744e-01  4.55639510e-02 -1.29786343e-01 -1.42997955e-01
   2.21830881e-01  4.32771468e-02 -3.40782109e-01  3.67863919e-01
  -3.46495344e-02  2.29277896e-02]
 [-8.17054269e-02 -5.13906878e-01  1.95331356e-03  6.77588281e-02
  -9.04630151e-02  1.45234340e-01 -1.28725614e-01  3.73382817e-02
   1.85815730e-01  1.11822106e-01 -6.47359248e-02  5.11817311e-01
  -3.81394677e-01  1.43827603e-03  3.70216694e-01 -2.80461926e-01
  -5.50021599e-02 -2.57702450e-05]]

 Eigen Values 
%s [9.64281175e+00 3.29287200e+00 1.20222050e+00 1.16285237e+00
 8.91370105e-01 7.40775369e-01 3.69020767e-01 2.22901706e-01
 1.54288031e-01 9.64464125e-02 6.51703662e-02 5.19653645e-02
 3.92693666e-02 3.77106640e-03 3.17033133e-02 2.46471372e-02
 1.90939370e-02 1.01222167e-02]
In [108]:
# Step 3 (continued): Sort eigenvalues in descending order

# Make a set of (eigenvalue, eigenvector) pairs
eig_pairs = [(eigenvalues[index], eigenvectors[:,index]) for index in range(len(eigenvalues))]

# Sort the (eigenvalue, eigenvector) pairs from highest to lowest with respect to eigenvalue
eig_pairs.sort()

#Desc sort
eig_pairs.reverse()
print(eig_pairs)

# Extract the descending ordered eigenvalues and eigenvectors
eigvalues_sorted = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sorted = [eig_pairs[index][1] for index in range(len(eigenvalues))]

# Let's confirm our sorting worked, print out eigenvalues
print('Eigenvalues in descending order: \n%s' %eigvalues_sorted)
[(9.642811747181636, array([-0.27306244, -0.28922123, -0.30288271, -0.27205011, -0.09721494,
       -0.16590345, -0.31205234,  0.31034288, -0.30899847, -0.27953571,
       -0.30201495, -0.30850239, -0.2659863 ,  0.03997778, -0.0403057 ,
       -0.05858685, -0.03588886, -0.08170543])), (3.2928720016853212, array([-0.09405689,  0.12978942, -0.05174486, -0.2006973 , -0.24548013,
       -0.08425488,  0.06961541, -0.00674739,  0.08230482,  0.12000875,
        0.07164051,  0.07660433,  0.20904643,  0.49799879, -0.03438621,
       -0.09939021, -0.50995959, -0.51390688])), (1.2022205017821275, array([-0.07100719,  0.19522087, -0.10144297,  0.09223579,  0.39560961,
        0.2051652 , -0.12766407,  0.07661047, -0.1368    ,  0.19055541,
       -0.0945556 , -0.13450364,  0.20129439,  0.02842765,  0.21647713,
       -0.73931451,  0.02108098,  0.00195331])), (1.162852366184379, array([ 0.20765711,  0.05789455,  0.0303897 , -0.23351452, -0.50362844,
       -0.08193694, -0.00247078,  0.05641834,  0.01908536,  0.0831143 ,
       -0.09412676,  0.00864262,  0.06078903, -0.15781568,  0.75669296,
       -0.06064825,  0.09417114,  0.06775883])), (0.891370105349998, array([ 0.12766719, -0.07027715, -0.10006488,  0.08564909,  0.07219669,
       -0.81192282,  0.11074108, -0.13061389,  0.09958159, -0.19769936,
        0.11141169,  0.15250002,  0.03539787,  0.01086438, -0.04727712,
       -0.33418865,  0.24396052, -0.09046302])), (0.7407753692342879, array([ 0.07160598,  0.10085715,  0.06274451, -0.23408477, -0.52301211,
        0.09216307,  0.01895932, -0.01236712,  0.02858698,  0.17474978,
       -0.06603606,  0.0207707 , -0.00247708, -0.27121177, -0.58037882,
       -0.41074731,  0.08405343,  0.14523434])), (0.36902076676401835, array([ 0.32297938, -0.39616985,  0.04563307,  0.1338898 , -0.11253046,
        0.37731767,  0.0828322 , -0.07198704,  0.08482178, -0.35661207,
        0.23051572,  0.09281081, -0.42219418,  0.19032296,  0.07931326,
       -0.34214741, -0.03369202, -0.12872561])), (0.22290170551189684, array([ 0.70112628,  0.12002408, -0.36162415, -0.06543239,  0.08811614,
       -0.00298007, -0.1056789 ,  0.23746264, -0.05683927,  0.26340727,
       -0.08323698, -0.060911  , -0.04582101,  0.33904009, -0.14393661,
        0.15904428,  0.18954023,  0.03733828])), (0.15428803099667376, array([-0.37697613, -0.01682361, -0.25676405, -0.02141005, -0.23483568,
        0.15238595, -0.02106237, -0.09358848, -0.07435655, -0.00906703,
        0.40055355,  0.00122859,  0.19224506,  0.42771066,  0.02130915,
        0.02552218,  0.53109774,  0.18581573])), (0.09644641245537748, array([-0.27232208,  0.06010591, -0.1413127 , -0.16342705,  0.11567492,
       -0.05693367,  0.20070777, -0.19482264,  0.23302718,  0.42309906,
       -0.22189476,  0.19684637, -0.63941895,  0.15754505,  0.09386086,
       -0.04817949,  0.04556395,  0.11182211])), (0.0651703661798928, array([-5.23164950e-02,  1.89813253e-04, -7.63794554e-01,  9.90895344e-02,
       -2.67968716e-02,  1.79674453e-01,  1.86019300e-01,  3.79020953e-02,
        2.67813442e-01, -1.63858398e-01, -5.71197011e-02,  2.41024784e-01,
        1.66845657e-01, -3.53440645e-01,  1.38813588e-02,  3.32633511e-02,
       -1.29786343e-01, -6.47359248e-02])), (0.05196536450373262, array([ 0.07092578, -0.27232015,  0.07745286, -0.36006399,  0.15227075,
        0.03780658,  0.11725948, -0.13051937,  0.18524875, -0.24146153,
       -0.35775337,  0.13343362,  0.34556554,  0.29351616, -0.02041993,
       -0.05443377, -0.14299796,  0.51181731])), (0.03926936662778677, array([ 0.13155675, -0.00508902, -0.09022635, -0.30714352,  0.11119742,
        0.1366433 ,  0.04396538, -0.7123778 , -0.21302277, -0.05892131,
       -0.16791082, -0.20572784,  0.07106557, -0.1213378 , -0.00985438,
        0.07634206,  0.22183088, -0.38139468])), (0.03170331333613729, array([ 0.05186109,  0.24168342, -0.18344663,  0.47502087, -0.26804065,
       -0.11110673, -0.01629345, -0.40084064, -0.22775611, -0.09973242,
       -0.1258808 , -0.25895667, -0.10387635,  0.15900982,  0.01405279,
       -0.0168577 , -0.34078211,  0.37021669])), (0.024647137215298633, array([-0.08258291,  0.11676907,  0.16690777,  0.39882641, -0.18157097,
        0.10569531,  0.05039806,  0.1097966 ,  0.17987761, -0.16129596,
       -0.63750073,  0.0923493 ,  0.07494968,  0.19802334, -0.03853557,
        0.01682871,  0.36786392, -0.28046193])), (0.01909393701574852, array([-0.01107066, -0.71667887, -0.04973225,  0.29902568, -0.10351407,
       -0.06143556, -0.03286184, -0.09720587,  0.03902917,  0.53798599,
       -0.08517262, -0.14404505,  0.21102207, -0.04029046, -0.00123855,
        0.00206306, -0.03464953, -0.05500216])), (0.010122216722890707, array([-2.71010754e-03,  8.82748206e-02, -5.67582038e-03, -4.55479218e-02,
        1.55439338e-02, -1.68674051e-02, -2.74594521e-01, -6.70754179e-02,
        7.44933180e-01, -6.84350464e-02,  9.97663769e-02, -5.82187834e-01,
       -2.45298994e-02,  9.70992552e-04, -5.62140810e-04, -5.70406714e-03,
        2.29277896e-02, -2.57702450e-05])), (0.0037710664007248517, array([-0.01105662, -0.00534339,  0.01143524, -0.034154  ,  0.01975377,
       -0.00767724,  0.82493631,  0.23761465, -0.0747014 , -0.01627988,
        0.02071955, -0.50252406,  0.00846414,  0.01291592, -0.00289432,
       -0.01397367,  0.04327715,  0.00143828]))]
Eigenvalues in descending order: 
[9.642811747181636, 3.2928720016853212, 1.2022205017821275, 1.162852366184379, 0.891370105349998, 0.7407753692342879, 0.36902076676401835, 0.22290170551189684, 0.15428803099667376, 0.09644641245537748, 0.0651703661798928, 0.05196536450373262, 0.03926936662778677, 0.03170331333613729, 0.024647137215298633, 0.01909393701574852, 0.010122216722890707, 0.0037710664007248517]
In [109]:
tot = sum(eigenvalues)
var_explained = [(i / tot) for i in sorted(eigenvalues, reverse=True)]  # an array of variance explained by each 
# eigen vector... there will be 8 entries as there are 8 eigen vectors)
cum_var_exp = np.cumsum(var_explained)  # an array of cumulative variance. There will be 8 entries with 8 th entry 
# cumulative reaching almost 100%
In [110]:
#1,19 depends on covariance matrix - count

plt.figure(figsize=(20,10))
plt.bar(range(1,19), var_explained, alpha=0.5, align='center', label='individual explained variance')
plt.step(range(1,19),cum_var_exp, where= 'mid', label='cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.legend(loc = 'best')
plt.show()
In [111]:
# P_reduce represents reduced mathematical space....

#From the above graph we see that 10 to 16 would be good.So lets try with that
#After trying with the above range we see that 13 would be perfect,

P_reduce = np.array(eigvectors_sorted[0:13])   # Reducing from 18 to 13 dimension space

X_std_13D = np.dot(X_std,P_reduce.T)   # projecting original data into principal component dimensions

Proj_data_df = pd.DataFrame(X_std_13D)  # converting array to dataframe for pairplot
In [112]:
#Let us check it visually

sns.pairplot(Proj_data_df, diag_kind='kde') 
Out[112]:
<seaborn.axisgrid.PairGrid at 0x37460048>
In [113]:
#Calling SVM function using PCM data

svm_fun(Proj_data_df, y_pcm)
The KNN model prediction is 98.03149606299213%
The confusion matrix is 
[[ 58   1   0]
 [  1 130   2]
 [  1   0  61]]
the Classification report is
              precision    recall  f1-score   support

         bus       0.97      0.98      0.97        59
         car       0.99      0.98      0.98       133
         van       0.97      0.98      0.98        62

   micro avg       0.98      0.98      0.98       254
   macro avg       0.98      0.98      0.98       254
weighted avg       0.98      0.98      0.98       254

Inferences:

On comparing the modelling with and without PCM, we see that the model performance and the class level performance also increased using PCM. So lets try with Hyper parameters.
In [114]:
  #RandomizedSearchCV - SVM
#Implement Hyperparameter

def hyper_params_svm(X,y):
#gamma is a parameter for non linear hyperplanes. The higher the gamma value it tries to exactly fit the training data set
    gammas = [0.1, 1, 10, 100]
#kernel parameters selects the type of hyperplane used to separate the data. Using ‘linear’ will use a linear hyperplane 
        #(a line in the case of 2D data). ‘rbf’ and ‘poly’ uses a non linear hyper-plane
    kernels  = ['linear', 'rbf', 'poly']
#C is the penalty parameter of the error term. It controls the trade off between smooth decision boundary and classifying the training points correctly.
    cs = [0.1, 1, 10, 100, 1000]
#degree is a parameter used when kernel is set to ‘poly’. It’s basically the degree of the polynomial used to find the hyperplane to split the data.
    degrees = [0, 1, 2, 3, 4, 5, 6]

# Create the random grid
    random_grid = {'gamma': gammas,
                   'kernel': kernels,
                   'C': cs,
                   'degree': degrees}

    pprint(random_grid)
    return random_grid

def randomizedsearch_svm(X,y):
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=1)
    X_train,X_test=standardization(X_train,X_test)
# Use the random grid to search for best hyperparameters
# First create the base model to tune
    svm_obj = svm.SVC(random_state=1)
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
    rf_random = RandomizedSearchCV(estimator = svm_obj, param_distributions = hyper_params_svm(X,y), n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
    rf_random.fit(X_train, y_train)
    print("Best Hyper Parameters:",rf_random.best_params_)
    
    pred=rf_random.predict(X_test)
    score=rf_random.score(X_test,y_test)
    print("The model prediction is " + str(score*100) + "%")
    print("The confusion matrix is ")
    print(metrics.confusion_matrix(y_test, pred))
    print("the Classification report is")
    print(metrics.classification_report(y_test, pred))
In [115]:
#Calling Randmized search for SVM

randomizedsearch_svm(Proj_data_df, y_pcm)
{'C': [0.1, 1, 10, 100, 1000],
 'degree': [0, 1, 2, 3, 4, 5, 6],
 'gamma': [0.1, 1, 10, 100],
 'kernel': ['linear', 'rbf', 'poly']}
Fitting 3 folds for each of 100 candidates, totalling 300 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:   12.8s
Best Hyper Parameters: {'kernel': 'rbf', 'gamma': 0.1, 'degree': 5, 'C': 1}
The model prediction is 96.06299212598425%
The confusion matrix is 
[[ 58   1   0]
 [  0 130   3]
 [  1   5  56]]
the Classification report is
              precision    recall  f1-score   support

         bus       0.98      0.98      0.98        59
         car       0.96      0.98      0.97       133
         van       0.95      0.90      0.93        62

   micro avg       0.96      0.96      0.96       254
   macro avg       0.96      0.95      0.96       254
weighted avg       0.96      0.96      0.96       254

[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  1.5min finished
Inference: We see that even the Hyper parameter doesnt help us in improving the Performance of the model compared to simple PCS method.